<!DOCTYPE html>



  


<html class="theme-next muse use-motion" lang="zh-Hans">
<head>
  <meta charset="UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1"/>
<meta name="theme-color" content="#222">









<meta http-equiv="Cache-Control" content="no-transform" />
<meta http-equiv="Cache-Control" content="no-siteapp" />
















  
  
  <link href="/lib/fancybox/source/jquery.fancybox.css?v=2.1.5" rel="stylesheet" type="text/css" />







<link href="/lib/font-awesome/css/font-awesome.min.css?v=4.6.2" rel="stylesheet" type="text/css" />

<link href="/css/main.css?v=5.1.4" rel="stylesheet" type="text/css" />


  <link rel="apple-touch-icon" sizes="180x180" href="/images/apple-touch-icon-next.png?v=5.1.4">


  <link rel="icon" type="image/png" sizes="32x32" href="/images/favicon-32x32-next.png?v=5.1.4">


  <link rel="icon" type="image/png" sizes="16x16" href="/images/favicon-16x16-next.png?v=5.1.4">


  <link rel="mask-icon" href="/images/logo.svg?v=5.1.4" color="#222">





  <meta name="keywords" content="Hexo, NexT" />










<meta name="description" content="Thanks for visiting my blog, lucky one!!!">
<meta property="og:type" content="website">
<meta property="og:title" content="Lai&#39;s blog">
<meta property="og:url" content="http://yoursite.com/index.html">
<meta property="og:site_name" content="Lai&#39;s blog">
<meta property="og:description" content="Thanks for visiting my blog, lucky one!!!">
<meta property="og:locale" content="zh-Hans">
<meta name="twitter:card" content="summary">
<meta name="twitter:title" content="Lai&#39;s blog">
<meta name="twitter:description" content="Thanks for visiting my blog, lucky one!!!">



<script type="text/javascript" id="hexo.configurations">
  var NexT = window.NexT || {};
  var CONFIG = {
    root: '/',
    scheme: 'Muse',
    version: '5.1.4',
    sidebar: {"position":"left","display":"post","offset":12,"b2t":false,"scrollpercent":false,"onmobile":false},
    fancybox: true,
    tabs: true,
    motion: {"enable":true,"async":false,"transition":{"post_block":"fadeIn","post_header":"slideDownIn","post_body":"slideDownIn","coll_header":"slideLeftIn","sidebar":"slideUpIn"}},
    duoshuo: {
      userId: '0',
      author: '博主'
    },
    algolia: {
      applicationID: '',
      apiKey: '',
      indexName: '',
      hits: {"per_page":10},
      labels: {"input_placeholder":"Search for Posts","hits_empty":"We didn't find any results for the search: ${query}","hits_stats":"${hits} results found in ${time} ms"}
    }
  };
</script>



  <link rel="canonical" href="http://yoursite.com/"/>





  <title>Lai's blog</title>
  








</head>

<body itemscope itemtype="http://schema.org/WebPage" lang="zh-Hans">

  
  
    
  

  <div class="container sidebar-position-left 
  page-home">
    <div class="headband"></div>

    <header id="header" class="header" itemscope itemtype="http://schema.org/WPHeader">
      <div class="header-inner"><div class="site-brand-wrapper">
  <div class="site-meta ">
    

    <div class="custom-logo-site-title">
      <a href="/"  class="brand" rel="start">
        <span class="logo-line-before"><i></i></span>
        <span class="site-title">Lai's blog</span>
        <span class="logo-line-after"><i></i></span>
      </a>
    </div>
      
        <p class="site-subtitle"></p>
      
  </div>

  <div class="site-nav-toggle">
    <button>
      <span class="btn-bar"></span>
      <span class="btn-bar"></span>
      <span class="btn-bar"></span>
    </button>
  </div>
</div>

<nav class="site-nav">
  

  
    <ul id="menu" class="menu">
      
        
        <li class="menu-item menu-item-home">
          <a href="/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-home"></i> <br />
            
            首页
          </a>
        </li>
      
        
        <li class="menu-item menu-item-tags">
          <a href="/tags/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-tags"></i> <br />
            
            标签
          </a>
        </li>
      
        
        <li class="menu-item menu-item-categories">
          <a href="/categories/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-th"></i> <br />
            
            分类
          </a>
        </li>
      
        
        <li class="menu-item menu-item-archives">
          <a href="/archives/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-archive"></i> <br />
            
            归档
          </a>
        </li>
      

      
    </ul>
  

  
</nav>



 </div>
    </header>

    <main id="main" class="main">
      <div class="main-inner">
        <div class="content-wrap">
          <div id="content" class="content">
            
  <section id="posts" class="posts-expand">
    
      

  

  
  
  

  <article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
  
  
  
  <div class="post-block">
    <link itemprop="mainEntityOfPage" href="http://yoursite.com/2018/06/28/Multiclass Support Vector Machine/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="name" content="Lai">
      <meta itemprop="description" content="">
      <meta itemprop="image" content="/images/avatar.gif">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="Lai's blog">
    </span>

    
      <header class="post-header">

        
        
          <h1 class="post-title" itemprop="name headline">
                
                <a class="post-title-link" href="/2018/06/28/Multiclass Support Vector Machine/" itemprop="url">Multiclass Support Vector Machine</a></h1>
        

        <div class="post-meta">
          <span class="post-time">
            
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              
                <span class="post-meta-item-text">发表于</span>
              
              <time title="创建于" itemprop="dateCreated datePublished" datetime="2018-06-28T10:39:00+08:00">
                2018-06-28
              </time>
            

            

            
          </span>

          

          
            
          

          
          

          

          

          

        </div>
      </header>
    

    
    
    
    <div class="post-body" itemprop="articleBody">

      
      

      
        
          
            <p>多分类支持向量机可以用来处理多分类问题，其分类函数可以给每一个类得到一个分数值，从而从数值上了解哪个类别的分数高低，其损失函数也是通过某个特定的阈值来优化从而使得模型在正确类别上得到一个比其他类别更高的分数值。</p>
<h4 id="loss函数-设定有C个类别"><a href="#loss函数-设定有C个类别" class="headerlink" title="loss函数(设定有C个类别)"></a>loss函数(设定有C个类别)</h4><p>$$s = [s_1, s_2, …, s_C] = f(x_i, W) = [f(x_i, W)_1, f(x_i, W)_2, … ,f(x_i, W)_C]$$<br>$$L_i = \sum_{j\neq y_i} \max(0, s_j - s_{y_i} + \Delta)$$<br>$$L =  \frac{1}{N} \sum_i L_i  +  \lambda R(W)$$<br>$$L = \frac{1}{N} \sum_i \sum_{j\neq y_i} \left[ \max(0, f(x_i; W)_j - f(x_i; W)_{y_i} + \Delta) \right] + \lambda \sum_k\sum_l W_{k,l}^2$$</p>
<h4 id="梯度下降"><a href="#梯度下降" class="headerlink" title="梯度下降"></a>梯度下降</h4><p>$$s = W^Tx_i$$<br>$$j==yi时：\frac{dL_i}{dW_{y_i}} = -\sum_{j\neq y_i}\mathbb{I}(W^T_jx_i - W^T_{y_i}x_i + \Delta&gt;0)$$<br>$$j！=i时：\frac{dL_i}{dW_yi} = \mathbb{I}(W^T_jx_i - W^T_{y_i}x_i + \Delta&gt;0)$$<br>$$\frac{dL}{dW} = \frac{1}{N}\sum^N_{i=1}\frac{dL_i}{dW} + 2\lambda W$$<br>$$执行 W = W - \alpha \frac{dL}{dW},在根据W求s，然后求loss，最后优化得到较理想的W值$$</p>

          
        
      
    </div>
    
    
    

    

    

    

    <footer class="post-footer">
      

      

      

      
      
        <div class="post-eof"></div>
      
    </footer>
  </div>
  
  
  
  </article>


    
      

  

  
  
  

  <article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
  
  
  
  <div class="post-block">
    <link itemprop="mainEntityOfPage" href="http://yoursite.com/2018/04/28/bacth-normlization/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="name" content="Lai">
      <meta itemprop="description" content="">
      <meta itemprop="image" content="/images/avatar.gif">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="Lai's blog">
    </span>

    
      <header class="post-header">

        
        
          <h1 class="post-title" itemprop="name headline">
                
                <a class="post-title-link" href="/2018/04/28/bacth-normlization/" itemprop="url">bacth normlization</a></h1>
        

        <div class="post-meta">
          <span class="post-time">
            
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              
                <span class="post-meta-item-text">发表于</span>
              
              <time title="创建于" itemprop="dateCreated datePublished" datetime="2018-04-28T16:38:00+08:00">
                2018-04-28
              </time>
            

            

            
          </span>

          

          
            
          

          
          

          

          

          

        </div>
      </header>
    

    
    
    
    <div class="post-body" itemprop="articleBody">

      
      

      
        
          
            <h2 id="bacth-normlization中的前向传播"><a href="#bacth-normlization中的前向传播" class="headerlink" title="bacth normlization中的前向传播"></a>bacth normlization中的前向传播</h2><p>input:X_{ij}(本层所有的样本矩阵为X维度为mxD，m为样本数，D为神经元的个数，其中X_{ij}为X中的某一个样本)<br>$$其中样本矩阵X = \begin{bmatrix}<br> X_{11}&amp; X_{12} &amp; … &amp;X_{1D} \\<br>X_{21}&amp; X_{22} &amp; … &amp;X_{2D} \\<br> .&amp; . &amp;  …&amp;. \\<br> .&amp;  .&amp; … &amp; .\\<br>X_{m1}&amp; X_{m2} &amp; … &amp;X_{mD}<br>\end{bmatrix},X_{i} = \begin{bmatrix}<br> X_{i1}&amp; X_{i2} &amp; … &amp;X_{iD} \\<br>\end{bmatrix},第j列为X_{\cdot j} = \begin{bmatrix}<br> X_{1j}\\<br>X_{2j}\\<br> .\\<br> .\\<br>X_{mj}<br>\end{bmatrix}$$<br>$$ output格式为: Yi = BN(X_{ij}, \gamma, \beta)\\<br>其中输出矩阵Y = \begin{bmatrix}<br> Y_{11}&amp; Y_{12} &amp; … &amp;Y_{1D} \\<br>Y_{21}&amp; Y_{22} &amp; … &amp;Y_{2D} \\<br> .&amp; . &amp;  …&amp;. \\<br> .&amp;  .&amp; … &amp; .\\<br>Y_{m1}&amp; Y_{m2} &amp; … &amp;Y_{mD}<br>\end{bmatrix}，m为样本数，D为输入维度数$$<br>前向传播过程如下<br>$$\mu_{j} = E(X) = \frac{1}{m}\sum_{i=1}^{m}X_{i}，则\mu_{j} 的维度为(1,D)\\<br>\sigma_{j}^{2} = Var(X) = \frac{1}{m}\sum_{i=1}^{m}(X_{i}-\mu_{j})^{2}，则\sigma_{j}^{2}的维度为(1,D)\\<br>对第i个样本的估计\hat{X_{ij}} = \frac{X_{ij} - \mu_{j}}{\sqrt{\sigma_{j}^{2} + \varepsilon }}，则\hat{X_{i}}维度为(1,D)\\<br>对m个样本估计\hat{X} = \frac{X - \mu_{j}}{\sqrt{\sigma_{j}^{2} + \varepsilon }}，则\hat{X}维度为(m,D)\\<br>第i个样本的输出Y_{i} = \gamma  \times \hat{X_{i}} + \beta，其中\gamma维度为(1,D),\beta维度为(1,D),则Y_{i}维度为(1,D)\\<br>则m个样本的输出Y = \gamma  \times \hat{X} + \beta，其中\gamma维度为(1,D),\beta维度为(1,D),则Y维度为(m,D)$$</p>
<h2 id="bacth-normlization中的反向传播"><a href="#bacth-normlization中的反向传播" class="headerlink" title="bacth normlization中的反向传播"></a>bacth normlization中的反向传播</h2><p>$$假设上层梯度为\frac{dL}{dY}，本层输入为X，过程如下：\\<br>从X中选中第j列，令x = \begin{bmatrix}<br> X_{1j}\\<br>X_{2j}\\<br> .\\<br> .\\<br>X_{mj}<br>\end{bmatrix}\\$$</p>
<h4 id="1-求dbeta"><a href="#1-求dbeta" class="headerlink" title="(1)求dbeta"></a>(1)求dbeta</h4><p>$$我们先求d\beta_{j} = \sum_{i=1}^{m}\frac{dL}{dY_{ij}} \cdot \frac{dY_{ij}}{d\beta_{j}}，<br>由Y_{i} = \gamma  \times \hat{X_{i}} + \beta可知Y_{ij} = \gamma_{j}  \times \hat{X_{ij}} + \beta_{j}，\frac{dY_{ij}}{d\beta_{j}}=1\\<br>所以d\beta_{j} = \sum_{i=1}^{m}\frac{dL}{dY_{ij}}，则对整个矩阵操作d\beta = \sum_{i=1}^{m}\frac{dL}{dY_{i}} = \begin{bmatrix}<br> d\beta_{1}&amp; d\beta_{2} &amp; … &amp;d\beta_{D} \\<br>\end{bmatrix}$$</p>
<h4 id="2-求dgamma"><a href="#2-求dgamma" class="headerlink" title="(2)求dgamma"></a>(2)求dgamma</h4><p>$$<br>d\gamma_{j} = \sum_{i=1}^{m}\frac{dL}{dY_{ij}} \cdot \frac{dY_{ij}}{d\gamma_{j}}\\<br>由Y_{i} = \gamma  \times \hat{X_{i}} + \beta可知Y_{ij} = \gamma_{j}  \times \hat{X_{ij}} + \beta_{j}，\frac{dY_{ij}}{d\gamma_{j}}=\hat{X_{ij}}\\<br>所以d\gamma_{j} = \sum_{i=1}^{m}\frac{dL}{dY_{ij}} \cdot \hat{X_{ij}}\\<br>对整个矩阵进行操作d\gamma = \sum_{i=1}^{m}\frac{dL}{dY_{i}} \cdot \hat{X_{i}} = \begin{bmatrix} d\gamma_{1}&amp; d\gamma_{2} &amp; … &amp;d\gamma_{D}<br>\end{bmatrix}$$</p>
<h4 id="3-求dX"><a href="#3-求dX" class="headerlink" title="(3)求dX"></a>(3)求dX</h4><p>最后我们还要对X进行求导，首先我们先看下面的链式路径：<br><img src="/images/chain_batchnorm.png" alt="image"><br>$$对第i行第j列进行反向传播：\frac{dL}{dX_{ij}} = \sum_{k=1}^{m}\frac{dL}{d\hat{X_{kj}}} \cdot\frac{d\hat{X_{kj}}}{dX_{ij}} = \sum_{k=1}^{m} \sum_{l=1}^{m}(\frac{dL}{d\hat{Y_{lj}}} \cdot \frac{dY_{lj}}{d\hat{X_{kj}}})\cdot\frac{d\hat{X_{kj}}}{dX_{ij}}\\<br>由Y_{i} = \gamma  \times \hat{X_{i}} + \beta可知Y_{lj} = \gamma_{j}  \times \hat{X_{lj}} + \beta_{j}，则\frac{dY_{lj}}{d\hat{X_{kj}}}=\gamma_{j}(当l=k时)，\frac{dY_{lj}}{d\hat{X_{kj}}}=0(当l≠k时)\\<br>则\frac{dL}{dX_{ij}} = \sum_{k=1}^{m}\frac{dL}{dY_{kj}} \cdot \frac{dY_{kj}}{d\hat{X_{kj}}} \cdot\frac{d\hat{X_{kj}}}{dX_{ij}} = \sum_{k=1}^{m}\gamma_{j} \cdot \frac{dL}{dY_{kj}} \cdot\frac{d\hat{X_{kj}}}{dX_{ij}} \\<br>由\hat{X_{ij}} = \frac{X_{ij} - \mu_{j}}{\sqrt{\sigma_{j}^{2} + \varepsilon }}，\mu_{j} = \frac{1}{m}\sum_{k=1}^{m}X_{kj}，<br>\sigma_{j}^{2} = \frac{1}{m}\sum_{k=1}^{m}(X_{kj}-\mu_{j})^{2}和上图可知我们求\frac{d\hat{X_{ij}}}{dX_{ij}}的话有三条路径:\\<br>第一条路径为：\frac{d\hat{X_{kj}}}{dX_{ij}} = \left \lceil k==i \right \rfloor \cdot \frac{1}{\sqrt{\sigma_{j}^{2} + \varepsilon }}\\<br>第二条路径为：\frac{d\hat{X_{kj}}}{dX_{ij}} = \frac{d\hat{X_{kj}}}{d\mu_{j}} \cdot \frac{d\mu_{j}}{dX_{ij}}，\frac{d\hat{X_{kj}}}{d\mu_{j}} = -\frac{1}{\sqrt{\sigma_{j}^{2} + \varepsilon }},\frac{d\mu_{j}}{dX_{ij}} = \frac{1}{m}\\<br>则\frac{d\hat{X_{kj}}}{dX_{ij}} = -\frac{1}{m\sqrt{\sigma_{j}^{2} + \varepsilon }}\\<br>第三条路径为：\frac{d\hat{X_{kj}}}{dX_{ij}} = \frac{d\hat{X_{kj}}}{d\sigma_{j}^{2}} \cdot \frac{d\sigma_{j}^{2}}{dX_{ij}}，\\<br>\frac{d\hat{X_{kj}}}{d\sigma_{j}^{2}} = -\frac{X_{kj} - \mu_{j}}{2(\sigma_{j}^{2} + \varepsilon)^{\frac{3}{2}}}，\\<br>求解\frac{d\sigma_{j}^{2}}{dX_{ij}}有两个路径：<br>路径1：\frac{d\sigma_{j}^{2}}{dX_{ij}} = \frac{2}{m}(X_{ij} - \mu_{j})，路径2：\frac{d\sigma_{j}^{2}}{dX_{ij}} = \frac{d\sigma_{j}^{2}}{d\mu_{j}} \cdot \frac{d\mu_{j}}{dX_{ij}} = - \frac{2}{m}(X_{ij} - \mu_{j}) \cdot \frac{1}{m}\\<br>则\frac{d\sigma_{j}^{2}}{dX_{ij}} = \frac{d\sigma_{j}^{2}}{dX_{ij}} + \frac{d\sigma_{j}^{2}}{d\mu_{j}} \cdot \frac{d\mu_{j}}{dX_{ij}} = \frac{2}{m}(X_{ij} - \mu_{j}) + (- \frac{2}{m}(X_{ij} - \mu_{j}) \cdot \frac{1}{m}) = \frac{2}{m^{2}}(X_{ij} - \mu_{j})(m - 1)\\<br>则\frac{d\hat{X_{kj}}}{dX_{ij}} = \frac{d\hat{X_{kj}}}{d\sigma_{j}^{2}} \cdot \frac{d\sigma_{j}^{2}}{dX_{ij}} = -\frac{X_{kj} - \mu_{j}}{2(\sigma_{j}^{2} + \varepsilon)^{\frac{3}{2}}} \cdot \frac{2}{m^{2}}(X_{ij} - \mu_{j})(m - 1) = \frac{(X_{kj} - \mu_{j}) \cdot(X_{ij} - \mu_{j}) \cdot(1 - m)}{m^{2}\cdot (\sigma_{j}^{2} + \varepsilon)^{\frac{3}{2}}}\\<br>综合上述三条路径可求得\frac{dL}{dX_{ij}} = \sum_{k=1}^{m}\gamma\cdot\frac{dL}{dY_{kj}}\cdot\frac{d\hat{X_{kj}}}{dX_{ij}} + \sum_{k=1}^{m}\gamma\cdot\frac{dL}{dY_{kj}}\cdot\frac{d\hat{X_{kj}}}{d\mu_{j}} \cdot \frac{d\mu_{j}}{dX_{ij}} +\sum_{k=1}^{m}\gamma\cdot\frac{dL}{dY_{kj}}\cdot \frac{d\hat{X_{kj}}}{d\sigma_{j}^{2}} \cdot \frac{d\sigma_{j}^{2}}{dX_{ij}}\\<br> = \gamma\cdot\frac{dL}{dY_{ij}}\cdot\frac{1}{\sqrt{\sigma_{j}^{2} + \varepsilon }} + \sum_{k=1}^{m}\gamma\cdot\frac{dL}{dY_{kj}}\cdot\frac{-1}{m\sqrt{\sigma_{j}^{2} + \varepsilon }} +\sum_{k=1}^{m}\gamma\cdot\frac{dL}{dY_{kj}}\cdot \frac{(X_{kj} - \mu_{j}) \cdot(X_{ij} - \mu_{j}) \cdot(1 - m)}{m^{2}\cdot (\sigma_{j}^{2} + \varepsilon)^{\frac{3}{2}}}\\<br> 对矩阵X进行整体操作:\\<br>\frac{dL}{dX} = \gamma\cdot\frac{dL}{dY}\cdot\frac{1}{\sqrt{\sigma^{2} + \varepsilon }} + \sum_{k=1}^{m}\gamma\cdot\frac{dL}{dY_{k}}\cdot\frac{-1}{m\sqrt{\sigma^{2} + \varepsilon }} +\sum_{k=1}^{m}\gamma\cdot\frac{dL}{dY_{k}}\cdot(X_{k} - \mu)\cdot \frac{ (X - \mu) \cdot(1 - m)}{m^{2}\cdot (\sigma^{2} + \varepsilon)^{\frac{3}{2}}}\\<br>总结一下：\\<br>d\beta = \frac{dL}{d\beta} = \sum_{i=1}^{m}\frac{dL}{dY_{i}}\\<br>d\gamma = \frac{dL}{d\gamma} = \sum_{i=1}^{m}\frac{dL}{dY_{i}}\cdot \hat{X_{i}}\\<br>dX = \frac{dL}{dX} = \gamma\cdot\frac{dL}{dY}\cdot\frac{1}{\sqrt{\sigma^{2} + \varepsilon }} + \sum_{k=1}^{m}\gamma\cdot\frac{dL}{dY_{k}}\cdot\frac{-1}{m\sqrt{\sigma^{2} + \varepsilon }} +\sum_{k=1}^{m}\gamma\cdot\frac{dL}{dY_{k}}\cdot(X_{k} - \mu)\cdot \frac{ (X - \mu) \cdot(1 - m)}{m^{2}\cdot (\sigma^{2} + \varepsilon)^{\frac{3}{2}}}\\<br>$$</p>

          
        
      
    </div>
    
    
    

    

    

    

    <footer class="post-footer">
      

      

      

      
      
        <div class="post-eof"></div>
      
    </footer>
  </div>
  
  
  
  </article>


    
      

  

  
  
  

  <article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
  
  
  
  <div class="post-block">
    <link itemprop="mainEntityOfPage" href="http://yoursite.com/2018/04/22/neural_network/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="name" content="Lai">
      <meta itemprop="description" content="">
      <meta itemprop="image" content="/images/avatar.gif">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="Lai's blog">
    </span>

    
      <header class="post-header">

        
        
          <h1 class="post-title" itemprop="name headline">
                
                <a class="post-title-link" href="/2018/04/22/neural_network/" itemprop="url">全连接神经网络</a></h1>
        

        <div class="post-meta">
          <span class="post-time">
            
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              
                <span class="post-meta-item-text">发表于</span>
              
              <time title="创建于" itemprop="dateCreated datePublished" datetime="2018-04-22T20:14:00+08:00">
                2018-04-22
              </time>
            

            

            
          </span>

          

          
            
          

          
          

          

          

          

        </div>
      </header>
    

    
    
    
    <div class="post-body" itemprop="articleBody">

      
      

      
        
          
            <p>&emsp;&emsp; 再学完斯坦福大学的计算机视觉的课后，总结一下自己对全连接神经网络的理解，方便在以后自己可以查阅和复习。首先简单的复习一下神经网络的概念，神经网络有输入层、隐藏层和输出层三种层，其中隐藏层可能会有多层，一个神经网络有多少层要看有多少个隐藏层加上输出层就为该神经网络的层数。神经网络的来源源于生物体的大脑的神经元的触发机制，但是我们要区分神经网络和真实生物体的差别。神经网络不是生物体神经元的真实映射。本篇文章以cs231n中的作业二中的神经网络作为背景进行讲述。</p>
<h2 id="1-神经网络中的基本结构"><a href="#1-神经网络中的基本结构" class="headerlink" title="1.神经网络中的基本结构"></a>1.神经网络中的基本结构</h2><p><img src="/images/neuralnetwork.png" alt="image"><br>&emsp;&emsp; 如上图所示，神经网络有输入层、隐藏层和输出层组成，这个神经网络一共有两层，一个隐藏层和一个输出层，输入层不算层数。输入层有输入维度为3，第一层有四个神经元，输出层有两个神经元。在某些结构中，我们的神经网络结构会更加，隐藏层可能会不只一个，而且每一层的神经元个数也会不唯一。其中每一个神经元有一个输入和一个输出，如下为一个神经元的内部详解：<br><img src="/images/activate.png" alt="image"><br>一个神经元其实有两个处理，首先是对前面的输入做一个线性求和$$Z = \sum_{i=1}^{N} wi \cdot xi +b$$<br>然后在有一个激活函数f在对z做处理得到这个神经元的输出<br>$$f(\sum_{i=1}^{N} wi \cdot xi +b)$$<br>讲完了基本的神经网络结构后，我们现在以cs231n中作业2中的全连接神经网络架构做一个讲述，他的架构为{affine - [batch norm] - relu - [dropout]} x (L - 1) - affine - softmax也就是说前面的L-1层的每一层，先做一个affine，然后batch norm 在接着激活函数用relu处理一下输出，最后做一个dropout，到最后一层就在一个affine后进入一个softmax层得到神经网络的最终输出。</p>
<h2 id="2-前向传播"><a href="#2-前向传播" class="headerlink" title="2.前向传播"></a>2.前向传播</h2><p>&emsp;&emsp; 前向传播是从输入层开始把每一层的输出递交给下一层直至最后一层将结果输出的过程，在cs231n中前面的L-1层的前向过程如下：<br>affine层对输入做一个线性组合输出affOut：<br>$$affOut = \sum_{i=1}^{N} wi \cdot xi +b$$<br>batch norm层：<br><img src="/images/batchnorm.png" alt="image"><br>得到输出batchOUt<br>relu层：<br><img src="/images/relu.png" alt="image"><br>得到输出reluOut<br>dropout层：<br><img src="/images/dropout.png" alt="image"><br>得到输出dropOut<br>&emsp;&emsp; 然后在第L层也就是最后一层先进入一个affine层，然后把结果进入一个softmax层得到各个类别的分类概率。<br>在对每个样本softmax进行求loss得到最后的softmax loss，在加上正则化后为<br>$$L=\frac{1}{N}\sum_{i=1}^{N}Li(W) + \lambda \cdot \sum_{l} \sum_{i}\sum_{j}W_{ij}^{l}$$<br><figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment">#代码解释</span></span><br><span class="line">affOut,affCache = affine_forward(inputX, W, b)</span><br><span class="line">batchOut,batchCache = batchnorm_forward(affOut, gamma, beta, self.bn_params[i])</span><br><span class="line">reluOut,reluCache = relu_forward(batchOut)</span><br><span class="line">dropOut,dropCache = dropout_forward(reluOut, self.dropout_param)</span><br></pre></td></tr></table></figure></p>
<h2 id="3-反向传播"><a href="#3-反向传播" class="headerlink" title="3.反向传播"></a>3.反向传播</h2><p>&emsp;&emsp; 反向传播其实就是链式求导的一个应用，求loss函数对最后一层输入的求导为：<br>$$dZL = \frac{dL}{dZ} = \frac{1}{N}\sum_{N}^{i=1}\frac{dLi}{dZi}$$<br>到dropOut层反向传播（该层输入 reluOut， 输出 dropOut（当为L-1层的时候dropOut=ZL））：<br>$$\frac{dL}{dreluOut} = \frac{dL}{ddropOut} \cdot \frac{ddropOut}{dreluOut}$$<br>到relu层反向传播（该层输入 dbatchOut 输出 reluOut）：<br>$$\frac{dL}{dbatchOut} = \frac{dL}{dreluOut} \cdot \frac{dreluOut}{dbatchOut}$$<br>到batchout层（该层输入 affOut， 输出 batchOut）：<br>$$\frac{dL}{daffOut} = \frac{dL}{dbatchOut} \cdot \frac{dbatchOut}{daffOut}$$<br>$$d\gamma = \frac{dL}{d\gamma} = \frac{dL}{dbatchOut} \cdot \frac{dbatchOut}{daffOut}$$<br>$$d\beta = \frac{dL}{d\beta} = \frac{dL}{dbatchOut} \cdot \frac{dbatchOut}{daffOut}$$<br>到affine层（该层输入 X， 输出 affOut）：<br>$$dX = \frac{dL}{dX} = \frac{dL}{daffOut} \cdot \frac{daffOut}{dX}$$<br>$$dW = \frac{dL}{dW} = \frac{dL}{daffOut} \cdot \frac{daffOut}{dW}$$<br>$$db = \frac{dL}{db} = \frac{dL}{daffOut} \cdot \frac{daffOut}{db}$$</p>
<h2 id="4-权重初始化"><a href="#4-权重初始化" class="headerlink" title="4.权重初始化"></a>4.权重初始化</h2><p><img src="/images/neuralnetwork.png" alt="image"><br>&emsp;&emsp; 继续拿这个神经网络来说明，在第一层和第二层中我们都需要对权重进行初始化，每一层的w的维度初始化为本层的输入个数和本层的神经元个数，例如上图中第一层w的维度为3x4的矩阵，b的维度为1x4，第二层权重w的维度为4x2，b的维度为1x2。batchnorm层中beta和gamma的维度都为1xD（D为该层神经元的个数）。<br>&emsp;&emsp; 一般w为从高斯分布中均值为0进行初始化，b初始化为0矩阵，beta初始化为0，gamma初始化为1</p>
<h2 id="5-神经网络中的梯度下降"><a href="#5-神经网络中的梯度下降" class="headerlink" title="5.神经网络中的梯度下降"></a>5.神经网络中的梯度下降</h2><p>对每一层：<br>$$\gamma = \gamma - \alpha \cdot d\gamma$$<br>$$\beta = \beta - \alpha \cdot d\beta$$<br>$$W = W - \alpha \cdot dW$$<br>$$b = b - \alpha \cdot db$$<br>&emsp;&emsp; 然后前向传播求出loss，当loss足够小或者迭代次数足够多的时候停止梯度下降，此时参数即为近似最优解</p>

          
        
      
    </div>
    
    
    

    

    

    

    <footer class="post-footer">
      

      

      

      
      
        <div class="post-eof"></div>
      
    </footer>
  </div>
  
  
  
  </article>


    
      

  

  
  
  

  <article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
  
  
  
  <div class="post-block">
    <link itemprop="mainEntityOfPage" href="http://yoursite.com/2018/04/20/Softmax/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="name" content="Lai">
      <meta itemprop="description" content="">
      <meta itemprop="image" content="/images/avatar.gif">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="Lai's blog">
    </span>

    
      <header class="post-header">

        
        
          <h1 class="post-title" itemprop="name headline">
                
                <a class="post-title-link" href="/2018/04/20/Softmax/" itemprop="url">Softmax 梯度下降优化</a></h1>
        

        <div class="post-meta">
          <span class="post-time">
            
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              
                <span class="post-meta-item-text">发表于</span>
              
              <time title="创建于" itemprop="dateCreated datePublished" datetime="2018-04-20T13:21:00+08:00">
                2018-04-20
              </time>
            

            

            
          </span>

          

          
            
          

          
          

          

          

          

        </div>
      </header>
    

    
    
    
    <div class="post-body" itemprop="articleBody">

      
      

      
        
          
            <h2 id="softmax函数简介："><a href="#softmax函数简介：" class="headerlink" title="softmax函数简介："></a>softmax函数简介：</h2><p>&emsp;&emsp; softmax函数是用来处理多分类的一种软性分类器，它输出的是每个类别的概率值。数据集特征矩阵为X其维度为D+1 x N（其中D+1为原本样本的特征维度数D加上bias的维度后的维度数，所以为D+1，N为样本数），标注矩阵为Y，维度为C x N(其中C为类别数，N为样本数)。<br><img src="/images/1524209707955.png" alt="image"><br>&emsp;&emsp; 当我们给softmax输入一个样本Xi其输出格式为softmax(Xi) = [s1,s2,……,sC]，其中s1对应类别1的概率值，s2对应类别2的概率值，依次类推到sC。softmax的过程如下:<br><img src="/images/1524203488388.png" alt="image"><br>然后来衡量第i个样本的loss公式如下：<br><img src="/images/1524205811631.png" alt="image">（其中yi表示第i个样本对应的类别）<br>所以N个样本的loss为：<br><img src="/images/1524205761034.png" alt="image"><br>加上正则化后为：<br><img src="/images/1524449673188.png" alt="image"><br>由如下Z和W与X的关系：<br><img src="/images/1524368247375.png" alt="image"></p>
<p>则可以把loss函数化成全部关于w的函数为：<br><img src="/images/1524450300358.png" alt="image"><br>现在我们来求softmax的导数，现在我们先对一个样本的导数进行求解，先把Li化简为如下形式：<br><img src="/images/1524206564344.png" alt="image"><br>则当对Wyi求导的时候（j==yi）：<br><img src="/images/1524206933385.png" alt="image"><br>当对Wj求导的时候（j！=yi）<br><img src="/images/1524206992830.png" alt="image"><br>则如上操作可以求出单个loss的梯度如下(其中设yi=2)：<br><img src="/images/1524207290063.png" alt="image"><br>现在我们需要把所有的梯度求出来并做一个平均就得到了loss的平均梯度：<br><img src="/images/1524207442115.png" alt="image"><br>加上正则化后的loss函数：<br><img src="/images/1524207513644.png" alt="image"><br>然后在足够的迭代次数中用梯度更新W（其中α为学习率）：<br><img src="/images/1524207624590.png" alt="image"><br>直到在达到足够的迭代次数或者loss足够小的时候则停止更新<br>此时得到的W则为我们在这个softmax中所得到的W，然后在测试集中测试所有样本可得到样本的预测类别。</p>

          
        
      
    </div>
    
    
    

    

    

    

    <footer class="post-footer">
      

      

      

      
      
        <div class="post-eof"></div>
      
    </footer>
  </div>
  
  
  
  </article>


    
  </section>

  


          </div>
          


          

        </div>
        
          
  
  <div class="sidebar-toggle">
    <div class="sidebar-toggle-line-wrap">
      <span class="sidebar-toggle-line sidebar-toggle-line-first"></span>
      <span class="sidebar-toggle-line sidebar-toggle-line-middle"></span>
      <span class="sidebar-toggle-line sidebar-toggle-line-last"></span>
    </div>
  </div>

  <aside id="sidebar" class="sidebar">
    
    <div class="sidebar-inner">

      

      

      <section class="site-overview-wrap sidebar-panel sidebar-panel-active">
        <div class="site-overview">
          <div class="site-author motion-element" itemprop="author" itemscope itemtype="http://schema.org/Person">
            
              <img class="site-author-image" itemprop="image"
                src="/images/avatar.gif"
                alt="Lai" />
            
              <p class="site-author-name" itemprop="name">Lai</p>
              <p class="site-description motion-element" itemprop="description">Thanks for visiting my blog, lucky one!!!</p>
          </div>

          <nav class="site-state motion-element">

            
              <div class="site-state-item site-state-posts">
              
                <a href="/archives/">
              
                  <span class="site-state-item-count">4</span>
                  <span class="site-state-item-name">日志</span>
                </a>
              </div>
            

            

            
              
              
              <div class="site-state-item site-state-tags">
                <a href="/tags/index.html">
                  <span class="site-state-item-count">1</span>
                  <span class="site-state-item-name">标签</span>
                </a>
              </div>
            

          </nav>

          

          

          
          

          
          

          

        </div>
      </section>

      

      

    </div>
  </aside>


        
      </div>
    </main>

    <footer id="footer" class="footer">
      <div class="footer-inner">
        <div class="copyright">&copy; <span itemprop="copyrightYear">2018</span>
  <span class="with-love">
    <i class="fa fa-user"></i>
  </span>
  <span class="author" itemprop="copyrightHolder">Lai</span>

  
</div>


  <div class="powered-by">由 <a class="theme-link" target="_blank" href="https://hexo.io">Hexo</a> 强力驱动</div>



  <span class="post-meta-divider">|</span>



  <div class="theme-info">主题 &mdash; <a class="theme-link" target="_blank" href="https://github.com/iissnan/hexo-theme-next">NexT.Muse</a> v5.1.4</div>




        







        
      </div>
    </footer>

    
      <div class="back-to-top">
        <i class="fa fa-arrow-up"></i>
        
      </div>
    

    

  </div>

  

<script type="text/javascript">
  if (Object.prototype.toString.call(window.Promise) !== '[object Function]') {
    window.Promise = null;
  }
</script>









  












  
  
    <script type="text/javascript" src="/lib/jquery/index.js?v=2.1.3"></script>
  

  
  
    <script type="text/javascript" src="/lib/fastclick/lib/fastclick.min.js?v=1.0.6"></script>
  

  
  
    <script type="text/javascript" src="/lib/jquery_lazyload/jquery.lazyload.js?v=1.9.7"></script>
  

  
  
    <script type="text/javascript" src="/lib/velocity/velocity.min.js?v=1.2.1"></script>
  

  
  
    <script type="text/javascript" src="/lib/velocity/velocity.ui.min.js?v=1.2.1"></script>
  

  
  
    <script type="text/javascript" src="/lib/fancybox/source/jquery.fancybox.pack.js?v=2.1.5"></script>
  


  


  <script type="text/javascript" src="/js/src/utils.js?v=5.1.4"></script>

  <script type="text/javascript" src="/js/src/motion.js?v=5.1.4"></script>



  
  

  

  


  <script type="text/javascript" src="/js/src/bootstrap.js?v=5.1.4"></script>



  


  




	





  





  












  





  

  

  

  
  

  
  
    <script type="text/x-mathjax-config">
      MathJax.Hub.Config({
        tex2jax: {
          inlineMath: [ ['$','$'], ["\\(","\\)"]  ],
          processEscapes: true,
          skipTags: ['script', 'noscript', 'style', 'textarea', 'pre', 'code']
        }
      });
    </script>

    <script type="text/x-mathjax-config">
      MathJax.Hub.Queue(function() {
        var all = MathJax.Hub.getAllJax(), i;
        for (i=0; i < all.length; i += 1) {
          all[i].SourceElement().parentNode.className += ' has-jax';
        }
      });
    </script>
    <script type="text/javascript" src="//cdn.bootcss.com/mathjax/2.7.1/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
  


  

  

</body>
</html>
